
// STEP 0: PREPARE THE BASELINE SURVEY AND ENDLINE SURVEY 
******************************************************************
* open list of students asssigned as treated 
clear 
import excel "$randomization/final/randomization_STEM_mainlist_9Oct2021.xlsx", sheet("Sheet1") firstrow

* keep relevant information 
keep menteeid studentid 
tempfile treatlist10 
save `treatlist10', replace

clear 
import excel using "$randomization/randomization_school_STEM_list.xlsx", clear firstrow 
keep schoolname treat* geoprisize
rename treat_273293 treatschool
tempfile treatschool10
save `treatschool10', replace

* open processed baseline survey 
use "$clean/grade910_baseline_reachable_wgrade.dta", clear 
merge 1:1 studentid using `treatlist10'
g treatstudent=_merge==3
drop _merge
lab var treatstudent "1=student assigned as treated"

merge m:1 schoolname using `treatschool10', keep(matched) nogen

* keep only grade 10 students
* or students from schools with missing grade information 
keep if grade==10 | inlist(schoolname, "lamgong middle secondary school", "samtengang central school", "wangbama central school")

* keep relevant information for now 
keep name sex b_* schoolname studentid email* phone responseid treatstudent treatschool geoprisize
rename b_month month 
g b_month=.
replace b_month=1 if month=="January"
replace b_month=2 if month=="February"
replace b_month=3 if month=="March"
replace b_month=4 if month=="April"
replace b_month=5 if month=="May"
replace b_month=6 if month=="June"
replace b_month=7 if month=="July"
replace b_month=8 if month=="August"
replace b_month=9 if month=="September"
replace b_month=10 if month=="October"
replace b_month=11 if month=="November"
replace b_month=12 if month=="December"
drop month
g bday_b = b_year*10^4 + b_month*10^2 + b_day
tostring bday_b, replace

rename responseid responseid_b

* format name and phone number
rename name name_original_b
g name = name_original_b
lab var name_original_b "name reported at baseline survey"
replace name = trim(itrim(lower(name)))
replace name = subinstr(name," ","",.)  
replace name = subinstr(name,",","",.)   //Removes comma (,)
replace name = subinstr(name,"'","",.)   //Removes apostrophe (')
replace name = subinstr(name,".","",.)   //Removes dot (.) 
replace name = subinstr(name,"/","",.)   //Removes slash (/)
replace name = subinstr(name,"-","",.)   //Removes dash (-)
replace name = subinstr(name,"(","",.)   //Removes opening parentheses
replace name = subinstr(name,")","",.)   //Removes closing parentheses
replace name = subinstr(name,"mynameis","",.)   //Removes irrelevant info
replace name = subinstr(name,"@educationgovbt","",.)   //Removes irrelevant info
replace name = subinstr(name,"@gmailcom","",.)   //Removes irrelevant info
format name %25s

replace phone = strlower(phone) 
replace phone = subinstr(phone,"na","",.)  
rename phone phone_b

* save as new dataset 
save "$temp/g10_p0b.dta", replace 




// STEP 1: MATCHING USING SCHOOL + ID + F/NAME 
******************************************************************
gl criteria_step1 schoolname studentid 

use "$temp/g10_p0b.dta", clear 
foreach i in name sex stream phone {
	cap rename `i' `i'_b
}
merge 1:1 $criteria_step1 using "$temp/g10_p0e1.dta"
foreach i in name sex stream phone {
	cap rename `i' `i'_e
}
matchit name_b name_e, g(namescore) 

// perfectly matched
preserve
keep if _merge==3 & (namescore>=0.4 | inlist(studentid, "201.00328.11.0295", "201.00272.11.0018"))
drop _merge 
save "$temp/g10_p1m.dta", replace
count
restore 

// unmatched: from baseline survey only
preserve 
keep if _merge==1 | ( _merge==3 & ~(namescore>=0.4 | inlist(studentid, "201.00328.11.0295", "201.00272.11.0018")))
keep $criteria_step1 status_e
merge 1:1 $criteria_step1 using "$temp/g10_p0b.dta", nogen keep(matched)
rename studentid studentid_b
save "$temp/g10_p1b.dta", replace
count
restore 

// unmatched: from endline survey only
preserve 
keep if _merge==2 |  (_merge==3 & ~(namescore>=0.4 | inlist(studentid, "201.00328.11.0295", "201.00272.11.0018")))
keep $criteria_step1 status_e
merge 1:1 $criteria_step1 using "$temp/g10_p0e1.dta", nogen keep(matched) 
append using "$temp/g10_p0e2.dta"
append using "$temp/g10_p0e3.dta"
rename studentid studentid_e
save "$temp/g10_p1e.dta", replace
count
restore




// STEP 2: MATCHING USING SCHOOL + F/NAME + BIRTHDAY
******************************************************************
gl criteria_step2 schoolname b_day b_month b_year

// check uniqueid at baseline and endline 
foreach dataset in b e {
use "$temp/g10_p1`dataset'.dta", clear
duplicates tag $criteria_step2, g(temp)

preserve 
keep if temp~=0
drop temp 
save "$temp/g10_doublecheck_p1`dataset'.dta", replace 
restore 

drop if temp~=0
drop temp 
save "$temp/g10_p1`dataset'_unique.dta", replace 
}


use "$temp/g10_p1b_unique.dta", clear 
foreach i in studentid name sex stream phone {
	cap rename `i' `i'_b
}
merge 1:1 $criteria_step2 using "$temp/g10_p1e_unique.dta"
foreach i in studentid name sex stream phone {
	cap rename `i' `i'_e
}
matchit name_b name_e, g(namescore)

// perfectly matched
preserve
keep if _merge==3 & (namescore>=0.61 | inlist(studentid_b, "201.00464.11.0021", "201.00024.11.0222"))
drop _merge 
save "$temp/g10_p2m.dta", replace
count
restore 

// unmatched: from baseline survey only
preserve 
keep if _merge==1 | (_merge==3 & ~(namescore>=0.61 | inlist(studentid_b, "201.00464.11.0021", "201.00024.11.0222")))
keep $criteria_step2 status_e 
merge 1:1 $criteria_step2 using "$temp/g10_p1b_unique.dta", nogen keep(matched)
cap rename studentid studentid_b
append using "$temp/g10_doublecheck_p1b.dta"
save "$temp/g10_p2b.dta", replace
count
restore 

// unmatched: from endline survey only
preserve 
keep if _merge==2 | (_merge==3 & ~(namescore>=0.61 | inlist(studentid_b, "201.00464.11.0021", "201.00024.11.0222")))
keep $criteria_step2 status_e
merge 1:1 $criteria_step2 using "$temp/g10_p1e_unique.dta", nogen keep(matched)
cap rename studentid studentid_e
append using "$temp/g10_doublecheck_p1e.dta"
save "$temp/g10_p2e.dta", replace
count
restore



// STEP 3: MATCHING USING SCHOOL + BIRTHDAY + PHONE 
**********************************************************************
gl criteria_step3 schoolname b_day b_month b_year phone 

// check uniqueid at baseline and endline 
foreach dataset in b e {
use "$temp/g10_p2`dataset'.dta", clear
rename phone_`dataset' phone
duplicates tag $criteria_step3, g(temp)

preserve 
keep if temp~=0
drop temp 
save "$temp/g10_doublecheck_p2`dataset'.dta", replace 
restore 

drop if temp~=0
drop temp 
save "$temp/g10_p2`dataset'_unique.dta", replace 
}

use "$temp/g10_p2b_unique.dta", clear 
foreach i in studentid name sex {
	cap rename `i' `i'_b
}
merge 1:1 $criteria_step3 using "$temp/g10_p2e_unique.dta" 
foreach i in studentid name sex {
	cap rename `i' `i'_e
}
matchit name_original_b name_original_e, g(namescore)

// perfectly matched
preserve
keep if _merge==3 & ~missing(phone) & namescore>=0.6
drop _merge 
save "$temp/g10_p3m.dta", replace
count
restore 

// unmatched: from baseline survey only
preserve 
keep if _merge==1 | ( _merge==3 & (missing(phone) | namescore<0.6))
keep $criteria_step3 status_e
merge 1:1 $criteria_step3 using "$temp/g10_p2b_unique.dta", nogen keep(matched)
cap rename studentid studentid_b
cap rename name name_b
append using "$temp/g10_doublecheck_p2b.dta"
save "$temp/g10_p3b.dta", replace
count
restore 

// unmatched: from endline survey only
preserve 
keep if _merge==2 | ( _merge==3 & (missing(phone) | namescore<0.6))
keep $criteria_step3 status_e
merge 1:1 $criteria_step3 using "$temp/g10_p2e_unique.dta", nogen keep(matched)
cap rename studentid studentid_e
cap rename name name_e
append using "$temp/g10_doublecheck_p2e.dta"
save "$temp/g10_p3e.dta", replace
count
restore


// STEP 4: MATCHING USING SCHOOL + SEX  + PHONE + FUZZYNAME/BIRTHDAY
*******************************************************************
gl criteria_step4 schoolname sex phone

// check uniqueid at baseline and endline 
foreach dataset in b e {
use "$temp/g10_p3`dataset'.dta", clear
duplicates tag $criteria_step4, g(temp)

preserve 
keep if temp~=0
drop temp 
save "$temp/g10_doublecheck_p3`dataset'.dta", replace 
restore 

drop if temp~=0
drop temp 
save "$temp/g10_p3`dataset'_unique.dta", replace 
}

use "$temp/g10_p3b_unique.dta", clear 
merge 1:1 $criteria_step4 using "$temp/g10_p3e_unique.dta" 
matchit name_b name_e, g(namescore) 
matchit bday_b bday_e, g(bdayscore) 

// perfectly matched 
preserve 
keep if _merge==3 & namescore>0.8
drop _merge 
save "$temp/g10_p4m.dta", replace
count
restore 

// unmatched: from baseline survey only
preserve 
keep if _merge==1 | (_merge==3 & namescore<=0.8)
keep $criteria_step4 status_e
merge 1:1 $criteria_step4 using "$temp/g10_p3b_unique.dta", nogen keep(matched)
drop  b_*
cap drop *_e
cap rename studentid studentid_e
append using "$temp/g10_doublecheck_p3b.dta"
save "$temp/g10_p4b.dta", replace
count
restore 

// unmatched: from endline survey only
preserve 
keep if _merge==2 | (_merge==3 & namescore<=0.8)
keep $criteria_step4 status_e
merge 1:1 $criteria_step4 using "$temp/g10_p3e_unique.dta", nogen keep(matched)
drop  b_*
cap drop *_b
append using "$temp/g10_doublecheck_p3e.dta"
replace name_e = name if ~missing(name)
save "$temp/g10_p4e.dta", replace
count
restore 


// STEP 5: MATCHING STUDENTID + SEX + PHONE + FUZZY NAME/BIRTHDAY 
// HAPPENDS IF STUDENTS TRANSFER 
*********************************************************************
gl criteria_step5 studentid sex phone

// check uniqueid at baseline and endline 
foreach dataset in b e {
use "$temp/g10_p4`dataset'.dta", clear
cap rename studentid_`dataset' studentid
rename schoolname schoolname_`dataset'
duplicates tag $criteria_step5, g(temp)

preserve 
keep if temp~=0
drop temp 
save "$temp/g10_doublecheck_p4`dataset'.dta", replace 
restore 

drop if temp~=0
drop temp 
save "$temp/g10_p4`dataset'_unique.dta", replace 
}

use "$temp/g10_p4b_unique.dta", clear 
merge 1:1 $criteria_step5 using "$temp/g10_p4e_unique.dta" 
matchit name_b name_e, g(namescore) 
matchit bday_b bday_e, g(bdayscore) 

// perfectly matched 
preserve 
keep if _merge==3 & (namescore>0.7 | bdayscore>0.7)
drop _merge 
save "$temp/g10_p5m.dta", replace
count
restore 

// unmatched: from baseline survey only
preserve 
keep if _merge==1 | (_merge==3 & ~(namescore>0.7 | bdayscore>0.7))
keep $criteria_step5 status_e
merge 1:1 $criteria_step5 using "$temp/g10_p4b_unique.dta", nogen keep(matched)
drop  b_* 
cap drop *_e
append using "$temp/g10_doublecheck_p4b.dta"
save "$temp/g10_p5b.dta", replace
count
restore 

// unmatched: from endline survey only
preserve 
keep if _merge==2 | (_merge==3 & ~(namescore>0.7 | bdayscore>0.7))
keep $criteria_step5 status_e
merge 1:1 $criteria_step5 using "$temp/g10_p4e_unique.dta", nogen keep(matched)
drop  b_*
cap drop *_b
append using "$temp/g10_doublecheck_p4e.dta"
save "$temp/g10_p5e.dta", replace
count
restore 



// STEP 6: MATCHING STUDENTID + SEX + STREAM + FUZZY NAME/BIRTHDAY/PHONE 
// HAPPENDS IF STUDENTS TRANSFER 
*********************************************************************
gl criteria_step6 studentid sex 

// check uniqueid at baseline and endline 
foreach dataset in b e {
use "$temp/g10_p5`dataset'.dta", clear
cap rename studentid_`dataset' studentid
cap rename phone phone_`dataset'
duplicates tag $criteria_step6, g(temp)

preserve 
keep if temp~=0
drop temp 
save "$temp/g10_doublecheck_p5`dataset'.dta", replace 
restore 

drop if temp~=0
drop temp 
save "$temp/g10_p5`dataset'_unique.dta", replace 
}

use "$temp/g10_p5b_unique.dta", clear 
merge 1:1 $criteria_step6 using "$temp/g10_p5e_unique.dta" 
matchit name_b name_e, g(namescore) 
matchit bday_b bday_e, g(bdayscore) 
matchit phone_b phone_e, g(phonescore) 

// perfectly matched 
preserve 
keep if _merge==3 & (namescore>0.7 | bdayscore>0.7)
drop _merge 
save "$temp/g10_p6m.dta", replace
count
restore 

// unmatched: from baseline survey only
preserve 
keep if _merge==1 | (_merge==3 & ~(namescore>0.7 | bdayscore>0.7))
keep $criteria_step6 status_e
merge 1:1 $criteria_step6 using "$temp/g10_p5b_unique.dta", nogen keep(matched)
drop  b_*
append using "$temp/g10_doublecheck_p5b.dta"
save "$temp/g10_p6b.dta", replace
count
restore 

// unmatched: from endline survey only
preserve 
keep if _merge==2 | (_merge==3 & ~(namescore>0.7 | bdayscore>0.7))
keep $criteria_step6 status_e
merge 1:1 $criteria_step6 using "$temp/g10_p5e_unique.dta", nogen keep(matched)
drop  b_*
append using "$temp/g10_doublecheck_p5e.dta"
save "$temp/g10_p6e.dta", replace
count
restore 



// COMBINING LISTS
******************************************************************
// MATCHED LIST 
clear
forval i=1/6 {
append using "$temp/g10_p`i'm.dta"
cap g dataset = `i'
replace dataset = `i' if missing(dataset)
}
g criteria = ""
replace criteria = "school + studentid + f/name" if dataset==1
replace criteria = "school + birthday + f/name" if dataset==2
replace criteria = "school + birthday + phone" if dataset==3
replace criteria = "school + phone + sex + (fuzzy name | fuzzy birthday)" if dataset==4
replace criteria = "studentid + phone + sex  + (fuzzy name | fuzzy birthday)" if dataset==5
replace criteria = "studentid + sex + (fuzzy name | fuzzy birthday)" if dataset==6
replace schoolname_b = schoolname if inlist(dataset, 1, 2, 3, 4)
replace schoolname_e = schoolname if inlist(dataset, 1, 2, 3, 4)
foreach i in studentid {
	foreach j in b e {
	replace `i'_`j' = `i' if inlist(dataset, 1, 5, 6)
}
}
foreach i in phone {
	foreach j in b e {
	replace `i'_`j' = `i' if inlist(dataset, 3, 4, 5)
}
}
foreach i in sex {
	foreach j in b e {
	replace `i'_`j' = `i' if inlist(dataset, 4, 5, 6)
}
}

count
drop *score
tab treatstudent
keep responseid* studentid* name* sex* schoolname* phone* criteria treatstudent status_e
drop sex schoolname studentid name phone name_b name_e 
save "$temp/grade10_matched_id.dta", replace 


*** REQUIRES MANUAL CHECK
use "$temp/g10_p6b.dta", clear
cap drop name_b name
cap drop b_*
foreach i in responseid schoolname name_original bday phone sex studentid {
	cap rename `i'_b `i'
}
order responseid schoolname name_original sex bday studentid 
lab var bday "birthday: YYYY.MM.DD"
save "$temp/grade10_unmatched_baseline.dta", replace

use "$temp/g10_p6e.dta", clear
cap drop name_e name 
cap drop b_*
foreach i in responseid schoolname name_original bday phone sex studentid {
	cap rename `i'_e `i'
}
order responseid schoolname name_original sex bday studentid 
lab var bday "birthday: YYYY.MM.DD"
save "$temp/grade10_unmatched_endline.dta", replace

/*/ EXPORT TO EXCEL FILES TO ASK CHEKU TO MANUALLY MATCH 
foreach i in baseline endline {
use "$temp/grade10_unmatched_`i'.dta", clear
sort schoolname name_original
g shortname = subinstr(schoolname, " school", "",.)
replace shortname = subinstr(shortname, " secondary", "",.)
replace shortname = subinstr(shortname, " middle", "",.)
replace shortname = subinstr(shortname, " central", "",.)
replace shortname = subinstr(shortname, " higher", "",.)

cap erase "$endline/grade10_`i'_list_manual_match.xlsx"
levelsof shortname, local(schoolist)

foreach school of local schoolist {
	preserve 
	keep if shortname=="`school'"
	drop shortname
	qui export excel using "$endline/grade10_`i'_list_manual_match.xlsx", firstrow(variables) sheet("`school'") sheetreplace cell(A1) 
	restore
}
}
*/

// OPEN MANUAL CHECK LIST BY CHECKU 
use "$temp/grade10_unmatched_baseline.dta", clear
sort schoolname name_original
g shortname = subinstr(schoolname, " school", "",.)
replace shortname = subinstr(shortname, " secondary", "",.)
replace shortname = subinstr(shortname, " middle", "",.)
replace shortname = subinstr(shortname, " central", "",.)
replace shortname = subinstr(shortname, " higher", "",.)
drop if inlist(shortname, "lamgong", "wangbama", "samtengang", "lhuentse")
levelsof shortname, local(schoolist)
foreach school of local schoolist {
	qui import excel using "$raw/manual/match_cheku/grade10_baseline_list_manual_match.xlsx", sheet("`school'") firstrow clear 
	rename _all, lower
	cap rename responseid_endline responseid_e
	cap g remarks = ""
	keep responseid* schoolname name* remarks
	drop if missing(responseid)
	qui save "$temp/file_grade10_`school'", replace  
}

* append data together
global tempfilelist: dir "$temp/" files "file_*.dta"
foreach tfile of global tempfilelist {
append using "$temp/`tfile'", force
erase "$temp/`tfile'"
}

duplicates drop responseid schoolname name_original, force 

preserve 
keep if missing(responseid_e)
merge 1:1 responseid schoolname using "$temp/grade10_unmatched_baseline.dta", keep(matched) nogen
foreach i in responseid schoolname name_original sex bday studentid phone {
	rename `i' `i'_b
}
save "$temp/grade10_unmatched_baseline_after_manualmatch.dta", replace 
restore

keep if ~missing(responseid_e)

* merge with baseline data 
merge 1:1 responseid schoolname using "$temp/grade10_unmatched_baseline.dta", keep(matched) nogen 
foreach i in responseid schoolname name_original sex bday studentid phone {
	rename `i' `i'_b
}
* merge with endline data
rename responseid_e responseid
merge 1:1 responseid using "$temp/grade10_unmatched_endline.dta", keep(matched) nogen 
foreach i in responseid schoolname name_original sex bday studentid phone {
	cap rename `i' `i'_e
}
g criteria = "manual matching by Cheku's team"
keep responseid_b name_original_b sex_b phone_b treatstudent responseid_e name_original_e sex_e phone_e studentid_b studentid_e schoolname_b schoolname_e criteria status_e

append using "$temp/grade10_matched_id.dta"
save "$temp/grade10_matched_final.dta", replace 




use "$temp/g10_p0b.dta", clear
foreach i in responseid schoolname name_original sex bday studentid phone {
	cap rename `i' `i'_b
}
merge 1:1 responseid_b schoolname_b using "$temp/grade10_matched_final.dta"
g followup=1 if _merge==3 
replace followup=0 if _merge==1
drop _merge 

g note="need to check if grade 10 student" if followup==0 & inlist(schoolname_b, "lamgong middle secondary school", "samtengang central school", "wangbama central school")

save "$endline/grade10_followup_track.dta", replace 

* erase temporary datasets
forval i=1/5 {
global filelist: dir "$temp/" files "g10_*p`i'*.dta"
foreach tfile of global filelist {
    erase "$temp/`tfile'"
}
}

/*/ Ask Cheku to check for grade information
use "$endline/grade10_followup_track.dta", clear
keep if followup==0 & inlist(schoolname_b, "lamgong middle secondary school", "samtengang central school", "wangbama central school")
sort schoolname_b responseid_b
keep responseid_b schoolname_b name_original_b sex_b b_day b_month b_year studentid_b phone_b note
order responseid_b schoolname_b name_original_b b_day b_month b_year sex_b studentid_b phone_b note
export excel using "$endline/grade10_followup_track_check_grade.xlsx", replace firstrow("variables")
*/

// Incorporate grade information from Cheku 
clear 
import excel using "$raw/manual/grade10_followup_track_check_grade.xlsx", firstrow
keep responseid_b schoolname_b grade910academicyear2021
keep if grade910academicyear2021=="10"
drop grade910academicyear2021
merge 1:1 responseid_b schoolname_b using "$endline/grade10_followup_track.dta", keep(matched) nogen
tempfile grade10
save `grade10', replace 

use "$endline/grade10_followup_track.dta", clear 
drop if followup==0 & inlist(schoolname_b, "lamgong middle secondary school", "samtengang central school", "wangbama central school")
append using `grade10'

// Save as current list 
drop note
sort schoolname_b 

* list of not-yet-surveyed students
preserve 
keep if missing(responseid_e)
tempfile notsurvey
save `notsurvey', replace
restore 

* list of surveyed students - to check if duplicates 
drop if missing(responseid_e)
duplicates tag responseid_e, g(temp)
duplicates drop responseid_e, force 
drop temp 
append using `notsurvey'
save "$endline/grade10_endline_$date.dta", replace 


// CHECK DATA QUALITY 
// COMPUTE FOLLOWUP RATES BY SCHOOL, BY DISTRICT, BY STUDENT STATUS 
use "$endline/grade10_endline_$date.dta", clear 
keep if ~missing(responseid_e)
unique responseid_e
duplicates tag responseid_e, g(temp)
keep responseid_b schoolname_b responseid_e
tempfile matching_id
save `matching_id', replace 

clear
import delimited  "$endlinecsv", clear
rename responseid responseid_e
merge 1:1 responseid_e using `matching_id', keep(matched) nogen
keep if status=="Partial"
tempfile partial
save `partial', replace 

use "$endline/grade10_endline_$date.dta", clear
preserve 
keep if missing(responseid_e)
tempfile notsurvey
save `notsurvey', replace
restore

drop if missing(responseid_e)
merge 1:1 responseid_e using `partial'
replace followup=2 if _merge==3
drop _merge 

append using `notsurvey'
rename schoolname_b schoolname 
merge m:1 schoolname using "$clean/list_of_schools_endline.dta", keep(matched) nogen keepusing(district)
rename schoolname schoolname_b

save "$endline/grade10_endline_$date.dta", replace 


use "$endline/grade10_endline_$date.dta", clear 
* total students at baseline 
bys schoolname_b: egen totstudent_b = total(~missing(responseid_b))

* total students can be followed up: 
bys schoolname_b: egen totstudent_e = total(followup==1|followup==2)
* total students can be followed up: complete submission
bys schoolname_b: egen totstudent_ec = total(followup==1)
* total students can be followed up: partial submission
bys schoolname_b: egen totstudent_ep = total(followup==2)

* total treatstudents at baseline
bys schoolname_b: egen totstudent_bt = total(treatstudent==1)
* total nontreatstudents at baseline
bys schoolname_b: egen totstudent_bn = total(treatstudent==0)

* total treatstudents at baseline that can be followed up:
bys schoolname_b: egen totstudent_et = total(treatstudent==1 & (followup==1|followup==2))
* total treatstudents at baseline that can be followed up: complete submission
bys schoolname_b: egen totstudent_ect = total(treatstudent==1 & followup==1)
* total treatstudents at baseline that can be followed up: partial submission
bys schoolname_b: egen totstudent_ept = total(treatstudent==1 & followup==2)

* total nontreatstudents at baseline that can be followed up:
bys schoolname_b: egen totstudent_en = total(treatstudent==0 & (followup==1|followup==2))
* total nontreatstudents at baseline that can be followed up: complete submission 
bys schoolname_b: egen totstudent_ecn = total(treatstudent==0 & followup==1)
* total nontreatstudents at baseline that can be followed up: complete submission 
bys schoolname_b: egen totstudent_epn = total(treatstudent==0 & followup==2)

duplicates drop schoolname_b, force

* follow-up rate by school
g followrate = totstudent_e/totstudent_b*100
lab var followrate "followup rate at school level"
g followrate_c = totstudent_ec/totstudent_b*100
lab var followrate "followup rate at school level: complete submission"
g followrate_p = totstudent_ep/totstudent_b*100
lab var followrate "followup rate at school level: partial submission"


// summary table of overall survey rate by school groups
preserve 
collapse (sum) totstudent_*, by(treatschool)
qui {
summ totstudent_bt if treatschool==1
scalar A = r(mean)
summ totstudent_bn if treatschool==1
scalar B = r(mean)
summ totstudent_bn if treatschool==0
scalar C = r(mean)

summ totstudent_et if treatschool==1
scalar D = r(mean)
summ totstudent_en if treatschool==1
scalar E = r(mean)
summ totstudent_en if treatschool==0
scalar F = r(mean)

summ totstudent_ect if treatschool==1
scalar G = r(mean)
summ totstudent_ecn if treatschool==1
scalar H = r(mean)
summ totstudent_ecn if treatschool==0
scalar I = r(mean)
}

mat overall = (A, B, C \ D, E, F \ G, H, I \ D/A*100, E/B*100, F/C*100 \ G/A*100, H/B*100, I/C*100)
mat list overall
qui svmat overall
keep overall*
g variable=""
replace variable = "total students" if _n==1
replace variable = "total surveyed" if _n==2
replace variable = "total surveyed: complete submission" if _n==3
replace variable = "follow-up rate (%)" if _n==4
replace variable = "follow-up rate (%): complete submission" if _n==5
order variable overall*
rename overall1 treated_treatschool
rename overall2 untreated_treatschool
rename overall3 untreated_school 
export excel using "$endline/grade10_endline_tracking_$date.xlsx", firstrow(variables) sheet("overall") sheetreplace cell(B2) 
restore 

// summary table of survey rates for treated students, nontreated students by school strata 
preserve 
collapse (sum) totstudent_*, by(treatschool geoprisize)
qui {
forval i=1/13 {	
summ totstudent_bt if treatschool==1 & geoprisize==`i'
scalar A`i' = r(mean)
summ totstudent_bn if treatschool==1 & geoprisize==`i'
scalar B`i' = r(mean)
summ totstudent_bn if treatschool==0 & geoprisize==`i'
scalar C`i' = r(mean)
summ totstudent_et if treatschool==1 & geoprisize==`i'
scalar D`i' = r(mean)
summ totstudent_en if treatschool==1 & geoprisize==`i'
scalar E`i' = r(mean)
summ totstudent_en if treatschool==0 & geoprisize==`i'
scalar F`i' = r(mean)
mat t`i' = (A`i', B`i', C`i' \ D`i', E`i', F`i' \ D`i'/A`i'*100, E`i'/B`i'*100, F`i'/C`i'*100)
}
}
mat overall = (t1/t2/t3/t4/t5/t6/t7/t8/t9/t10/t11/t12/t13)
mat list overall
qui svmat overall
keep overall*
g variable=""
replace variable = "total students" if mod(_n,3)==1
replace variable = "total surveyed" if mod(_n,3)==2
replace variable = "follow-up rate (%)" if mod(_n,3)==0
g geoprisize=.
forval i=1/13 {
	replace geoprisize=`i' if int((_n+2)/3)==`i'
}
order geoprisize variable overall*
rename overall1 treated_treatschool
rename overall2 untreated_treatschool
rename overall3 untreated_school 
export excel using "$endline/grade10_endline_tracking_$date.xlsx", firstrow(variables) sheet("by_strata") sheetreplace cell(B2) 
restore 

// summary table of survey rates for treated students, nontreated students by district  
preserve 
collapse (sum) totstudent_*, by(treatschool district)
sort treatschool district 
g n_district=_n
replace n_district=_n-11 if _n>=12
qui {
forval i=1/11 {	
cap summ totstudent_bt if treatschool==1 & n_district==`i'
scalar A`i' = r(mean)
summ totstudent_bn if treatschool==1 & n_district==`i'
scalar B`i' = r(mean)
summ totstudent_bn if treatschool==0 & n_district==`i'
scalar C`i' = r(mean)
summ totstudent_et if treatschool==1 & n_district==`i'
scalar D`i' = r(mean)
summ totstudent_en if treatschool==1 & n_district==`i'
scalar E`i' = r(mean)
summ totstudent_en if treatschool==0 & n_district==`i'
scalar F`i' = r(mean)
mat t`i' = (A`i', B`i', C`i' \ D`i', E`i', F`i' \ D`i'/A`i'*100, E`i'/B`i'*100, F`i'/C`i'*100)
}
}
mat overall = (t1/t2/t3/t4/t5/t6/t7/t8/t9/t10/t11)
mat list overall
qui svmat overall
keep overall*
g variable=""
replace variable = "total students" if mod(_n,3)==1
replace variable = "total surveyed" if mod(_n,3)==2
replace variable = "follow-up rate (%)" if mod(_n,3)==0
g n_district=.
forval i=1/11 {
	replace n_district=`i' if int((_n+2)/3)==`i'
}
g district=""
replace district="Chhukha" if n_district==1
replace district="Lhuentse" if n_district==2
replace district="Mongar" if n_district==3
replace district="Paro" if n_district==4
replace district="Punakha" if n_district==5
replace district="Samdrup Jongkhar" if n_district==6
replace district="Samtse" if n_district==7
replace district="Thimphu" if n_district==8
replace district="Trashigang" if n_district==9
replace district="Trashiyangtse" if n_district==10
replace district="Wangdue Phodrang" if n_district==11
drop n_district
order district variable overall*
rename overall1 treated_treatschool
rename overall2 untreated_treatschool
rename overall3 untreated_school 
export excel using "$endline/grade10_endline_tracking_$date.xlsx", firstrow(variables) sheet("by_district") sheetreplace cell(B2) 
restore 

// summary table of survey rates for treated students, nontreated students by each school 
* survey rate for treat students at school level 
g followrate_treated = totstudent_et/totstudent_bt*100
lab var followrate_treated "survey rate for treat students at school level"

g followrate_ntreated = totstudent_en/totstudent_bn*100
lab var followrate_ntreated "survey rate for nontreat students at school level"

keep district schoolname_b followrate* treatschool geoprisize totstudent_*
rename totstudent_b students_at_baseline
rename totstudent_e students_did_endline
rename totstudent_bt treated_students_at_baseline
rename totstudent_bn nontreated_students_at_baseline
rename totstudent_et treated_students_did_endline 
rename totstudent_en nontreated_students_did_endline
drop tot*
drop followrate_c followrate_p 
order district schoolname_b followrate* treatschool geoprisize
sort district schoolname_b 

export excel using "$endline/grade10_endline_tracking_$date.xlsx", firstrow(variables) sheet("by_school") sheetreplace cell(B2) 


// LIST FOR FOLLOW_UP SURVEY IN TARGETED SCHOOLS 
clear
import excel using "$endline/grade10_endline_tracking_$date.xlsx", sheet("by_school") firstrow 
merge 1:m schoolname_b using "$endline/grade10_endline_$date.dta", nogen 
cap erase "$endline/grade10_list_for_followup_$date.xlsx"
keep if followup==0
keep responseid_b name_original_b sex_b schoolname_b b_day b_month b_year studentid_b email phone_b treatstudent treatschool followrate followrate_treated followrate_ntreated
order responseid_b schoolname_b name_original_b sex_b b_day b_month b_year studentid_b email phone_b treatstudent treatschool followrate followrate_treated followrate_ntreated
sort followrate followrate_treated followrate_ntreated schoolname_b 
g note = "have not done followup survey/poor quality data"
sort followrate followrate_treated followrate_ntreated schoolname_b 

export excel using "$endline/grade10_target_list_for_followup_$date.xlsx", firstrow(variables) sheet("Summary") sheetreplace cell(A1) 

g shortname = subinstr(schoolname_b, " school", "",.)
replace shortname = subinstr(shortname, " secondary", "",.)
replace shortname = subinstr(shortname, " middle", "",.)
replace shortname = subinstr(shortname, " central", "",.)
replace shortname = subinstr(shortname, " higher", "",.)

levelsof shortname, local(schoolist)

foreach school of local schoolist {
	preserve 
	keep if shortname=="`school'"
	drop shortname
	qui export excel using "$endline/grade10_target_list_for_followup_$date.xlsx", firstrow(variables) sheet("`school'") sheetreplace cell(A1) 
	restore
}

	

